* Author: Joe Tatarka
* Name: 3_2_gws_build.do
* Purpose: Combine raw GWS data into aggregate app usage per month 
* Basically when I downloaded the GWS data I split it into two parts: a panel of users and the actual app events. This file is cleaning the actual app events. We want to sum up the total app usage of the delivery and driver apps by month.

* Set Global File Paths
global root = "T:/service_industries/replication_package"

global raw_root = "${root}/datasets/raw"
global intermediate_root = "${root}/datasets/intermediate"
global built_root = "${root}/datasets/built"
global exhibits_root = "${root}/exhibits"

global gws_root = "T:/generally_useful_datasets/gws_06_17_24_pull/clean"

*********************************************************************************
****** 1. Bring in delivery app events
********************************************************************************

* loop through files and clean them
local i = 1
local files : dir "${gws_root}/delivery_apps" files "*.csv"
quietly foreach file in `files' {
	
import delimited "${gws_root}/delivery_apps/`file'", clear

drop v1 

*** Keep important variables
gen date = mdy(month, day, year)
format date %td
keep date visibleduration panelistid appdescription

replace panelistid = substr(panelistid, 8,7)
compress panelistid

tempfile delivery_`i'
save `delivery_`i'', replace

local i = `i' + 1
}

* append cleaned files
local k = `i' - 1
use `delivery_1', clear 
quietly forval j = 2/`k' {
	append using `delivery_`j'', force
	erase `delivery_`j''
}

********************************************************************************
****** 2. Clean Appended Data 
********************************************************************************
sort date
gen year_month_gs = ym(year(date), month(date))
format year_month_gs %tm

*** Clean app names
replace appdescription = "GH Drivers" if appdescription == "Grubhub for Drivers" // Grubhub driver app name changed partway through the sample period
replace appdescription = "Shopper" if appdescription == "Instashopper"
replace appdescription = "Shopper" if appdescription == "Instacart Shopper"
replace appdescription = "Doordash" if appdescription == "DoorDash " // Notice that there is a space after DoorDash in the raw data.

******** Only keep restaurant Delivery and Driver Apps (I originally also downloaded data for instacart and for apps from specific restaurants chains)
keep if inlist(appdescription, "GH Drivers", "Dasher", "Fleet by Postmates", "Doordash", "Grubhub", "Postmates", "UberEATS")

*** sum to year-month level, want aggregate app usage by month
*** I use visible duration as the app usage metric. There are other possible options, but this one seemed the most reasonable to me.
collapse (sum) visibleduration, by(year_month_gs appdescription)

** Put visible duration into minutes (currently in milliseconds)
replace visibleduration = (visibleduration/(1000*60))
label var year_month_gs "Year-Month"

***** reshape dataset so it is easy to create total delivery and total driver app usage.
replace appdescription = "GH_Drivers" if appdescription == "GH Drivers"
replace appdescription = "Fleet_by_Postmates" if appdescription == "Fleet by Postmates"
rename visibleduration app_use_

reshape wide app_, i(year_month_gs) j(appdescription) string
rename *, lower
gen app_use_all_delivery = app_use_doordash + app_use_grubhub + app_use_postmates + app_use_ubereats
gen app_use_all_driver = app_use_gh_drivers + app_use_dasher + app_use_fleet_by_postmates

********************************************************************************
**** Save Intermediate Dataset
********************************************************************************
save "${intermediate_root}/3_2_gws_app_usage.dta", replace


